clear all
tempfile tempsave
set seed ${seed}


/*---------*/
/*PULL DATA*/
/*---------*/
use "${temp}data_individual.dta", clear
merge 1:1 aid using "${temp}data_schools.dta", nogen

/*-------------*/
/*Define Macros*/
/*-------------*/

/*Variables of interest*/
global interest pgs_edu f_over1

/*Control variables*/
global child 		gender age_home_w1 pc1-pc20 firstborn
global family 		edumother edufather ///
					bornus_mother bornus_father ///
					bartik_mean_mother bartik_mean_father ///
					bartik_sd_mother bartik_sd_father ///
					agemum rel
global peer			peer_white peer_single peer_mum_educ peer_mig peer_sex

/*Outcomes*/
global skills 		pvt risk pat health_sub health_obj intel consc extra agree neuro
global skills1		pvt_lag risk2 pat2 health_sub1

/*Robustness variables*/
global school	 	retent track f_stri private teach_white teach_female
global factor		f_over2 class teach_master teach_5years teach_new f_extend*
global invest		inv auth2
global sesoth		sesbelsky
global pgsoth 		pgs_bmi pgs_adh pgs_dep pgs_int pgs_smk pgs_slp
global va 			pvt_lag_res grade_sci_res grade_mat_res
global tpeer		tpeer_white tpeer_single tpeer_mum_educ	tpeer_sex tpeer_mig
global cpeer		peer_grade_eng peer_grade_mat

/*Define variables to be standardized*/
global s_vars		"$interest $child $family $peer $skills $skills1 $school $factor $invest $sesoth $pgsoth $va $tpeer $cpeer"

/*Define control vector*/
global controls 	c.gender##c.age_home_w1 c.(pc1-pc20) c.firstborn ///
			 		c.edumother c.edufather ///
					c.bornus_mother c.bornus_father ///
					c.bartik_mean_mother c.bartik_mean_father ///
					c.bartik_sd_mother c.bartik_sd_father ///
					c.agemum c.rel ///
					c.peer_white c.peer_single c.peer_mum_educ c.peer_mig c.peer_sex
global controls_r 	c.gender##c.age_home_w1 c.(pc1-pc20) c.firstborn ///
			 		c.edumother c.edufather ///
					c.bornus_mother c.bornus_father ///
					c.bartik_mean_mother c.bartik_mean_father ///
					c.bartik_sd_mother c.bartik_sd_father ///
					c.agemum c.rel

/*-------------*/
/*Define Sample*/
/*-------------*/
qui: reghdfe eduyears c.pgs_edu##(c.f_over1) ///
$controls $invest if ancestry=="European ancestry" & HsImp1==0 	[pw=w0] ///
, absorb(w1state HsId)	cluster(HsId)
gen sample1=e(sample)

qui: reghdfe eduyears c.pgs_edu##(c.f_over1) ///
$controls $invest if ancestry=="European ancestry" & HsImp1==0 	[pw=w0] ///
, absorb(w1state HsId famid) 	 	cluster(HsId)
gen sample2=e(sample)

qui: reghdfe eduyears c.pgs_edu##(c.f_over1) ///
$controls $invest if ancestry=="European ancestry" & HsImp3==0 	[pw=w0] ///
, absorb(w1state HsId) 	 cluster(HsId)
gen sample3=e(sample)

qui: reghdfe eduyears c.pgs_edu##(c.f_over1) ///
$controls $invest if ancestry=="European ancestry" & HsImp4==0 	[pw=w0] ///
, absorb(w1state HsId) 	 cluster(HsId)
gen sample4=e(sample)

qui: reghdfe eduyears c.pgs_edu##(c.f_over1) ///
$controls $invest pvt_lag grade_sci grade_mat grade_eng if ancestry=="European ancestry" & grade_w1<9 	[pw=w0] ///
, absorb(w1state)		cluster(HsId)
gen sample5=e(sample)

qui: reghdfe eduyears c.pgs_edu##(c.f_over1) ///
$controls $tpeer $invest if ancestry=="European ancestry" & grade_w1<12 	[pw=w0] ///
, absorb(w1state)		cluster(HsId)
gen sample6=e(sample)

qui: reghdfe eduyears c.pgs_edu##(c.f_over1) ///
$controls $invest pat pat2 risk risk2 pvt pvt_lag health_sub health_sub1 if ancestry=="European ancestry" & grade_w1>=9 	[pw=w0] ///
, absorb(w1state)		cluster(HsId)
gen sample7=e(sample)

/*---------------------------*/
/*Define Additional Variables*/
/*---------------------------*/
// Support variables
gen RobSchlOther=1
gen RobGeneOther=1

// Predicted education
reghdfe eduyears ///
$controls_r	[aw=w0] if sample1==1, absorb(w1state) cluster(HsId)
predict pred if sample1==1, xb

// Residualize lagged ability
foreach x of varlist pvt_lag grade_sci grade_mat{
	reghdfe `x' pgs_edu $controls_r if sample5==1, absorb(w1state) resid(`x'_res)
}

// Generate sibling PGIs
bysort famid: gen sibcount=_n
forvalues i=1/5{
	gen pgs_edu_`i'=(sibcount==`i')*pgs_edu if (sibcount==`i')!=0
	gcollapse (max) pgs_edu_`i', by(famid) merge replace
	replace pgs_edu_`i'=. if (sibcount==`i')
}
gegen pgs_sib=rowmean(pgs_edu_*)
drop pgs_edu_*

save "${data}data_estim.dta", replace
